##############################################
# REPLICATION FILES FOR KING, LUCAS, NIELSEN #
##############################################

# Install and load latest version of the MatchingFrontier
library(devtools) 
install_github('ChristopherLucas/MatchingFrontier@148a33bce48e442160c873ea4e0528df54964dac')

library(MatchingFrontier)
library(scales)
library(proxy)
library(igraph)

###########################
###########################
# Algorithm Illustrations #
###########################
###########################

######
# L1 #
######

get.L1 <- function(control.freqs, treated.freqs){
    l1 <- sum(abs(control.freqs / sum(control.freqs) - treated.freqs / sum(treated.freqs))) * .5
    return(l1)
}

remove.from.bin <- function(control.freqs, treated.freqs){
    singles <- treated.freqs == 0
    if(sum(control.freqs[singles]) > 0){
        singletons <- which(treated.freqs == 0)
        print(singletons)
        singleton <- singletons[which(control.freqs[singletons] == max(control.freqs[singletons]))]
        return(singleton)
    }
    diffs <- control.freqs / sum(control.freqs) - treated.freqs / sum(treated.freqs)
    print(diffs)
    return(which(diffs == max(diffs))[1])
}

test.drop <- function(prune, control.freqs, treated.freqs){
    new.control.freqs <- control.freqs
    new.control.freqs[prune] <- new.control.freqs[prune] - 1
    old.L1 <- get.L1(control.freqs, treated.freqs)
    new.L1 <- get.L1(new.control.freqs, treated.freqs)
    if(new.L1 < old.L1){TRUE}
    else{FALSE}
}

get.frontier <- function(control.freqs, treated.freqs){                   
    L1s <- get.L1(control.freqs, treated.freqs)
    order.pruned <- c()

    while(1){
        prune <- remove.from.bin(control.freqs, treated.freqs)
        
        if(test.drop(prune, control.freqs, treated.freqs)){
            order.pruned <- c(order.pruned, prune)
            control.freqs[prune] <- control.freqs[prune] - 1
            L1s <- c(L1s, get.L1(control.freqs, treated.freqs))
        }
        else{break}
    }
    return(list(L1s = L1s, order.pruned = order.pruned))
}

make.plots <- function(frontier, control.freqs, treated.freqs){
    names <- c('\t\ \ \ \ Bin 1', NA, '\t\ \ \ \ Bin 2', NA, '\t\ \ \ \ Bin 3', NA, '\t\ \ \ \ Bin 4',
               NA, '\t\ \ \ \ Bin 5', NA)
    plots <- list()
    count <- 0
    for(i in c(frontier$order.pruned, NA)){
        control.dist <- control.freqs / sum(control.freqs)
        treated.dist <- treated.freqs / sum(treated.freqs)

        new.dist <- (control.freqs[i] - 1) / (sum(control.freqs) - 1)
        dif <- control.dist[i] - new.dist
        control.dist[i] <- new.dist

        drop.diff <- rep(0, 5)
        drop.diff[i] <- dif
        
        control.freqs.plot <- c(control.dist, rep(0, 5))[c(1,6,2,7,3,8,4,9,5,10)]
        drop.diff <- c(drop.diff, rep(0, 5))[c(1,6,2,7,3,8,4,9,5,10)]
        treated.freqs.plot <- c(treated.dist, rep(0, 5))[c(6,1,7,2,8,3,9,4,10,5)]
        
        plot.mat <- rbind(control.freqs.plot, drop.diff, treated.freqs.plot)
        df.bar <- barplot(plot.mat,
                          space = c(.71, .25),
                          ylim = c(0, .4),
                          names.arg = names,
                          col=c('#f0f0f0', 'black', '#636363'),
                          las = 1,
                          cex.axis = 1.5,
                          cex.names = 1.5)
        if(length(plots) == 0){
            x <- which(drop.diff != 0)
            y <- sum(drop.diff[x], control.freqs.plot[x], .01)
            text(df.bar[x] + .6, y + .02, 'Next Unit\n to Remove', cex = 1.5)
            text(df.bar[3] - .5, sum(drop.diff[1], control.freqs.plot[3], .015),
                 'Control', cex = 1.5)
            text(df.bar[4] + .1, sum(drop.diff[4], treated.freqs.plot[4], .015),
                 'Treatment', cex = 1.5)
        }
        if(length(plots) > 0){
          segments(x0 = df.bar - .49, y0 = apply(old.plot, 2, sum), x1 = df.bar + .49, y1 = apply(old.plot, 2, sum))
        }
        if(length(plots) == 1){
            text(df.bar[5] + .525, apply(old.plot, 2, sum)[5] + .08,
                 'Horizontal\n lines mark\n the heights\n of bars in\n the previous\n graph', cex = 1.5)
        }
        text(df.bar[9], .35,count, cex = 5)
        plots[[length(plots) + 1]] <- recordPlot()
        old.plot <- rbind(control.freqs.plot + drop.diff, treated.freqs.plot)
        control.freqs[i] <- control.freqs[i] - 1
        count <- count + 1
    }

    return(plots)
}
     
set.seed(40291)                   

control.freqs <- as.integer(runif(5, 0, 10))
treated.freqs <- as.integer(runif(5, 0, 10))

control.freqs[1] <- 5
control.freqs[3] <- 1
control.freqs[5] <- 6

frontier <- get.frontier(control.freqs, treated.freqs)
plots <- make.plots(frontier, control.freqs, treated.freqs)

for (p in 1:length(plots)) {
    fname <- paste0('L1_', p, '.pdf')
    pdf(fname)
    replayPlot(plots[[p]])
    dev.off()
}

pdf("frontierPlot.pdf")
par(mar = c(5,6,4,2) + 0.1)
plot(frontier$L1s,
     xlab = "Number of Observations Pruned",
     ylab = "",
     cex.lab = 1.5,
     cex.axis = 1.5,
     pch = 20,
     las = 1)
for(n in 1:length(frontier$L1s)){
    text(n + .1, frontier$L1s[n] + .001, n - 1, cex = 1.5)
}
title(ylab="L1 Difference", line=4, cex.lab=1.5)
dev.off()

###############
# Mahalanobis #
###############

movePoints <- function(x0y0, xy, d){
  total.dist <- apply(cbind(x0y0, xy), 1,
             function(x) stats::dist(rbind(x[1:2], x[3:4])))
  p <- d / total.dist
  p <- 1 - p
  x0y0[,1] <- xy[,1] + p*(x0y0[,1] - xy[,1])
  x0y0[,2] <- xy[,2] + p*(x0y0[,2] - xy[,2])
  return(x0y0)
}

####################################
# FUNCTIONS FOR DRAWING CURVED PLOTS
####################################

iArrows <- igraph:::igraph.Arrows

two.way.arrows <- function(group1, group2, d, ...){
    curvedFlag <- whichCurved(group1,group2[, c(3:4, 1:2)])
    shortened.arrows(group1[,1], group1[,2], group1[,3], group1[,4], d,
                     curvedFlag = curvedFlag[[1]], curve = .4, ...)
    shortened.arrows(group2[,1], group2[,2], group2[,3], group2[,4], d,
                     curvedFlag = curvedFlag[[2]], curve = .4, ...)
}

whichCurved <- function(a1,a2, d)
{
    c1 <- rep(0, nrow(a1))
    c2 <- rep(0, nrow(a2))
    a1.vec <- apply(a1, 1, paste, collapse = "")
    a2.vec <- apply(a2, 1, paste, collapse = "")
    c1[a1.vec %in% a2.vec] <- 1
    c2[a2.vec %in% a1.vec] <- 1
    return(list(c1, c2))
}

shortened.arrows <- function(x0, y0, x1, y1, d, curvedFlag, curve, ...){
    total.dist <- apply(cbind(x0, y0, x1, y1), 1,
                        function(x) stats::dist(rbind(x[1:2], x[3:4])))
    p <- d / total.dist
    p <- 1 - p
    x0 <- x1 + p*(x0 - x1)
    y0 <- y1 + p*(y0 - y1)
    x1 <- x0 + p*(x1 - x0)
    y1 <- y0 + p*(y1 - y0)
    arrows(x0[curvedFlag == 0], y0[curvedFlag == 0],
           x1[curvedFlag == 0], y1[curvedFlag == 0], ...)
    x1 <- x1 + .015*p*(y0 - y1)
    y1 <- y1 - .015*p*(x0 - x1)
    x0 <- x0 - .015*p*(y1 - y0)
    y0 <- y0 + .015*p*(x1 - x0)
    iArrows(x0[curvedFlag == 1], y0[curvedFlag == 1],
           x1[curvedFlag == 1], y1[curvedFlag == 1], curve = curve, size = 1)
}

###################################

set.seed(100)
# generate controls
X1 <- runif(5, 5, 15)
X2 <- runif(5, 5, 15)
control.dat <- data.frame(X1, X2)

control.dat <- control.dat[-4,]
control.dat[4,] <- c(10.5, 6.5)

# generate treateds
X1 <- runif(5, 0, 20)
X2 <- runif(5, 0, 20)
treated.dat <- data.frame(X1, X2)

df <- as.matrix(proxy::dist(x = control.dat, y = treated.dat))

# Calculate 1 and 2
A.j <- apply(df, 1, min) 
A.k <- apply(df, 2, min)

# Calculate 3 and 4
C <- sort(unname(c(A.j, A.k)), decreasing = TRUE)

A.j.indices <- apply(df, 1, function(x) which(x == min(x)))
A.k.indices <- apply(df, 2, function(x) which(x == min(x)))

vec3out <- c()
vec4out <- c()

for(j in 1:nrow(df)){
   rank <- which(C == A.j[j])[1] 
   vec3out <- c(vec3out, rank)
}

for(k in 1:ncol(df)){
   rank <- which(C == A.k[k])[1] 
   vec4out <- c(vec4out, rank)
}


frontierX <- unlist(lapply(unique(C), function(x) which(C == x)[1]))
frontierY <- unlist(lapply(frontierX, function(x) mean(C[x:length(C)])))

# Control lab points
control.lab.x <- frontierX[frontierX %in% vec3out]
control.lab.y <- frontierY[frontierX %in% vec3out]
    
treatment.lab.x <- frontierX[frontierX %in% vec4out]
treatment.lab.y <- frontierY[frontierX %in% vec4out]

gap.x <- treatment.lab.x[treatment.lab.x %in% control.lab.x]
gap.y <- treatment.lab.y[treatment.lab.x %in% control.lab.x]
treatment.labs <- treatment.lab.x
treatment.lab.x[treatment.lab.x %in% control.lab.x] <- treatment.lab.x[treatment.lab.x %in% control.lab.x] + .385
treatment.lab.y <- frontierY[frontierX %in% vec4out]

    
pdf('IllustrationWithNumbers.pdf')
plot(rbind(control.dat, treated.dat), type = "n", xlab = 'Covariate #1',
     ylab = 'Covariate #2', las = 1, cex.axis = 1.5, cex.lab = 1.5)
text(control.dat, paste0("C",as.character(vec3out)), cex = 2)
text(treated.dat, paste0("T",as.character(vec4out)), cex = 2)
group1 <- cbind(control.dat, treated.dat[A.j.indices,])
group2 <- cbind(treated.dat, control.dat[A.k.indices,])
two.way.arrows(group1, group2, d = .65)
dev.off()

pdf('IllustrationFrontier.pdf')
par(mar = c(5,6,4,2) + 0.1)
plot(frontierX, frontierY, pch = 16, xlim = c(1, 9), ylim = c(2.5, 4.5),
     xlab = 'Number of Observations Pruned',
     ylab = '', axes = FALSE, frame.plot=TRUE, cex.lab = 1.5)
title(ylab="Mean Mahalanobis Distance", line=4, cex.lab=1.5)
axis(side = 1, at = c(1:9), las = 1, cex.axis = 1.5)
axis(side = 2, at = seq(1.75, 4.5, .25), las = 1, cex.axis = 1.5)
text(control.lab.x + .3, control.lab.y + .1, paste0("C", control.lab.x), cex = 2)
text(treatment.lab.x + .55, treatment.lab.y + .1, paste0("T", treatment.labs), cex = 2)
text(gap.x + .65, gap.y + .04, ',', cex = 2)
dev.off()

#################
#################
# Data Analysis #
#################
#################

###########
# Lalonde #
###########

# Load data
lalonde <- read.csv('./data/lalonde.csv')

u74 <- as.integer(lalonde$re74 == 0)
u75 <- as.integer(lalonde$re75 == 0)

lalonde$u74 <- u74
lalonde$u75 <- u75
 
# Create vector of variables to match on
match.on <- colnames(lalonde)[!(colnames(lalonde) %in% c('re78', 'treat', 'X'))]

# Make frontier
L1.lalonde.frontier <- makeFrontier(dataset = lalonde,
                                    treatment = 'treat',
                                    outcome = 're78',
                                    match.on = match.on,
                                    QOI = 'SATT',
                                    metric = 'L1',
                                    ratio = 'fixed')

# Estimate effects
my.form <- as.formula(re78 ~ treat + age + black + education + hispanic +
                          married + nodegree + re74 + re75 + u74 + u75)
L1.lalonde.estimates <- estimateEffects(L1.lalonde.frontier, 're78 ~ treat',
                                        mod.dependence.formula = my.form,
                                        continuous.vars = c('age', 'education', 're74', 're75'),
                                        prop.estimated = .1,
                                        means.as.cutpoints = FALSE)

# Make plots
pdf('l1_lalonde_frontier.pdf')
par(mar=c(5,6,1,1))
plotFrontier(L1.lalonde.frontier,
             main = '',
             las=1,
             cex.lab = 1.4,
             cex.axis = 1.4,
             type = 'l',
             panel.first =
                 grid(NULL,
                      NULL,
                      lwd = 2)
             )
dev.off()

####################################################
# WRITE FUNCTIONS FOR LABELING LINES IN MEANS PLOT #
####################################################

range01 <- 
function (x) 
{
    if (min(x) == max(x)) {
        x <- rep(0, length(x))
        return(x)
    }
    (x - min(x))/(max(x) - min(x))
}

plotMeans.labelLines <- 
function (frontier.object, xlab = "Number of Observations Pruned", 
          main = "Means Plot", xlim = c(0, max(frontier.object$frontier$Xs)), 
          ylim = c(0, 1),
          cols = c("#33B5E5", "#AA66CC", "#99CC00", "#FFBB33", "#FF4444", "#33B5E5", "#AA66CC", "#99CC00", "#FFBB33", "#FF4444"),
          diff.in.means = FALSE, right.shift = 0,
          ylab.adjust.list = NULL, var.labs = NULL, cex.axis=1, ...)
{
    covs.mat <- matrix(nrow = 0, ncol = length(frontier.object$match.on), 
        byrow = FALSE)
    colnames(covs.mat) <- frontier.object$match.on
    for (col in frontier.object$match.on) {
        frontier.object$dataset[, colnames(frontier.object$dataset) == 
            col] <- range01(frontier.object$dataset[, colnames(frontier.object$dataset) == 
            col])
    }
    for (i in 1:length(frontier.object$frontier$Xs)) {
        this.dat.inds <- unlist(frontier.object$frontier$drop.order[i:length(frontier.object$frontier$drop.order)])
        dataset <- frontier.object$dataset[this.dat.inds, ]
        if (frontier.object$ratio == "weights") {
            w <- makeWeights(dataset, frontier.object$treatment)
            dataset$w <- w
            new.row <- c()
            for (col in colnames(covs.mat)) {
                if (diff.in.means) {
                  treated.inds <- dataset[[frontier.object$treatment]] == 
                    1
                  treated.mean <- weighted.mean(dataset[treated.inds, 
                    col], w = dataset$w[treated.inds])
                  control.inds <- dataset[[frontier.object$treatment]] == 
                    0
                  control.mean <- weighted.mean(dataset[control.inds, 
                    col], w = dataset$w[control.inds])
                  new.row <- c(new.row, abs(treated.mean - control.mean))
                }
                else {
                  new.row <- c(new.row, weighted.mean(dataset[, 
                    col], w = dataset$w))
                }
            }
            covs.mat <- rbind(covs.mat, new.row)
        }
        else {
            new.row <- c()
            for (col in colnames(covs.mat)) {
                if (diff.in.means) {
                  treated.inds <- dataset[[frontier.object$treatment]] == 
                    1
                  treated.mean <- mean(dataset[treated.inds, 
                    col])
                  control.inds <- dataset[[frontier.object$treatment]] == 
                    0
                  control.mean <- mean(dataset[control.inds, 
                    col])
                  new.row <- c(new.row, abs(treated.mean - control.mean))
                }
                else {
                  new.row <- c(new.row, mean(dataset[, col]))
                }
            }
            covs.mat <- rbind(covs.mat, new.row)
        }
    }
    if (!(exists("ylab"))) {
        if (diff.in.means) {
            ylab <- "Difference in scaled means"
        }
        else {
            ylab <- "Scaled means"
        }
    }
    x <- frontier.object$frontier$Xs
    plot(1, type = "n", xlim = xlim, ylim = ylim, xlab = xlab, 
        ylab = ylab, axes=F, ...)
    axis(2, cex.axis=cex.axis, las=1)
    axis(1,at = pretty(c(0,max(xlim)),n=4), cex.axis=cex.axis)
    box()
    ## individual y-axis adjustments for the variable labels
    adjust.vec <- rep(0,ncol(covs.mat))
    names(adjust.vec) <- colnames(covs.mat)
    if(!is.null(ylab.adjust.list)){
        tmp <- unlist(lapply(ylab.adjust.list,function(x){tmp <- as.numeric(x[2]);names(tmp)<-x[1];return(tmp)}))
        adjust.vec[names(tmp)] <- tmp
    }
    ## replace the labels with user specified labels
    if(is.null(var.labs)){var.labs <- colnames(covs.mat)}
    ## plot lines and labels
    for (i in 1:ncol(covs.mat)) {
        this.y <- covs.mat[, colnames(covs.mat)[i]]
        points(x, this.y, type = "l", col = cols[i])
        text(min(x) + right.shift, head(this.y, 1) + adjust.vec[i], var.labs[i], cex = 1.4, col = cols[i],pos=2,xpd=NA)
    }
}

pdf('l1_lalonde_means.pdf')
par(mar=c(5,4.2,1,1))
plotMeans.labelLines(L1.lalonde.frontier,
          ylim = c(0,.82),
          xlim = c(-11000, 16176),
          main = '',
          cols = c("gray30","gray33","gray36","gray39","gray57","gray45","gray48","gray51","gray54","gray42"),
          cex.lab = 1.4,
          cex.axis = 1.4,
          ylab.adjust.list=list(c("u74",.02),c("hispanic",-.01)),
          ## these are the variable names we matched on, but we want more informative labels
          #frontier.object$match.on
          var.labs = c("Age",
              "Education",
              "Black",
              "Hispanic",
              "Married",
              "No degree",
              "Income 1974",
              "Income 1975",
              "Unemployment 1974",
              "Unemployment 1975"),
          panel.first =
              grid(NULL,
                   NULL,
                   lwd = 2)
          )
dev.off()

plotPrunedMeans.labelLines <-
function (frontier.object, xlab = "Number of Observations Pruned", 
          main = "Means Plot", xlim = c(0, max(frontier.object$frontier$Xs)), 
          ylim = c(0, 1),
          labels.on.left = T,
          cols = c("#33B5E5", "#AA66CC", "#99CC00", "#FFBB33", "#FF4444", "#33B5E5", "#AA66CC", "#99CC00", "#FFBB33", "#FF4444"),
          diff.in.means = FALSE, right.shift = 0,
          ylab.adjust.list = NULL, var.labs = NULL, cex.axis =1, ...)
{
    covs.mat <- matrix(nrow = 0, ncol = length(frontier.object$match.on), 
        byrow = FALSE)
    colnames(covs.mat) <- frontier.object$match.on
    for (col in frontier.object$match.on) {
        frontier.object$dataset[, colnames(frontier.object$dataset) == 
            col] <- range01(frontier.object$dataset[, colnames(frontier.object$dataset) == 
            col])
    }
    for (i in 1:length(frontier.object$frontier$Xs)) {
        this.dat.inds <- unlist(frontier.object$frontier$drop.order[1:i])
        dataset <- frontier.object$dataset[this.dat.inds, ]
        new.row <- c()
        for (col in colnames(covs.mat)) {
            if (diff.in.means) {
                treated.inds <- dataset[[frontier.object$treatment]] == 
                  1
                treated.mean <- mean(dataset[treated.inds, col])
                control.inds <- dataset[[frontier.object$treatment]] == 
                  0
                control.mean <- mean(dataset[control.inds, col])
                new.row <- c(new.row, abs(treated.mean - control.mean))
            }
            else {
                new.row <- c(new.row, mean(dataset[, col]))
            }
        }
        covs.mat <- rbind(covs.mat, new.row)
    }    
    if (!(exists("ylab"))) {
        if (diff.in.means) {
            ylab <- "Difference in scaled means"
        }
        else {
            ylab <- "Scaled means"
        }
    }
    x <- frontier.object$frontier$Xs
    plot(1, type = "n", xlim = xlim, ylim = ylim, xlab = xlab, 
        ylab = ylab, axes=F, ...)
    axis(2, cex.axis=cex.axis, las=1)
    axis(1,at = pretty(c(0,max(xlim)),n=4), cex.axis=cex.axis)
    box()
    ## individual y-axis adjustments for the variable labels
    adjust.vec <- rep(0,ncol(covs.mat))
    names(adjust.vec) <- colnames(covs.mat)
    if(!is.null(ylab.adjust.list)){
        tmp <- unlist(lapply(ylab.adjust.list,function(x){tmp <- as.numeric(x[2]);names(tmp)<-x[1];return(tmp)}))
        adjust.vec[names(tmp)] <- tmp
    }
    ## replace the labels with user specified labels
    if(is.null(var.labs)){var.labs <- colnames(covs.mat)}
    ## plot lines and labels
    for (i in 1:ncol(covs.mat)) {
        this.y <- covs.mat[, colnames(covs.mat)[i]]
        points(x, this.y, type = "l", col = cols[i])
        if(labels.on.left==T){
          text(min(x) + right.shift, head(this.y, 1) + adjust.vec[i], var.labs[i], cex = 1.4, col = cols[i],pos=2,xpd=NA)
        } else {
          text(max(x) + right.shift, tail(this.y, 1) + adjust.vec[i], var.labs[i], cex = 1.4, col = cols[i],pos=4,xpd=NA)
        }   
    }
}

# For appendix
pdf('l1_lalonde_pruned_means.pdf')
par(mar=c(5,4.2,1,1))
plotPrunedMeans.labelLines(L1.lalonde.frontier,
          ylim = c(0,1),
          xlim = c(-11000, 16176),
          main = '',
          cols =  c("gray30",
              "gray33",
              "gray36",
              "gray39",
              "gray57",
              "gray45",
              "gray48",
              "gray51",
              "gray54",
              "gray42"),
          cex.lab = 1.4,
          cex.axis = 1.4,
          ylab.adjust.list=list(c("nodegree",.2),c("u74",.15),c("u75",.1),c("black",.04),c("re74",-.03),c("re75",-.04)),
          ## these are the variable names we matched on, but we want more informative labels
          # L1.lalonde.estimates$match.on
          var.labs = c("Age",
              "Education",
              "Black",
              "Hispanic",
              "Married",
              "No degree",
              "Income 1974",
              "Income 1975",
              "Unemployment 1974",
              "Unemployment 1975"),
          panel.first =
              grid(NULL,
                   NULL,
                   lwd = 2)
          )
dev.off()

pdf('l1_lalonde_estimates_full.pdf')
par(mar=c(5,6,1,1))
plotEstimates(L1.lalonde.estimates,
              main = '',
              ylab = "Estimate\n",
              cex.lab = 1.4,
              cex.axis = 1.4,
              las = 1,
              mod.dependence.col = "gray75",
              mod.dependence.border.col = "gray75",
              line.col = "black",
              panel.first =
                  grid(NULL,
                       NULL,
                       lwd = 2,
                       ),
              ylim = c(-10000, 2500)
              )
abline(a = 1794, b = 0, lty=2,lwd=2)
text(0,1400,"Experimental Estimate", pos=4, cex=1.4)
text(0,-7000,"Matching Estimate", pos=4, cex=1.4)
dev.off()

pdf('l1_lalonde_estimates_zoom.pdf')
par(mar=c(5,6,1,1))
plotEstimates(L1.lalonde.estimates,
              main = '',
              ylab = "Estimate\n",
              cex.lab = 1.4,
              cex.axis = 1.4,
              las = 1,
              mod.dependence.col = "gray75",
              mod.dependence.border.col = "gray75",
              line.col = "black",
              #xlim = c(15500, 16170),
              xlim = c(16170-200, 16170),
              panel.first =
                  grid(NULL,
                       NULL,
                       lwd = 2,
                       ),
              ylim = c(-10000, 2500)
              )
abline(a = 1794, b = 0, lty=2,lwd=2)
text(16170-200,1400,"Experimental Estimate", pos=4, cex=1.4)
text(16170-200,-3000,"Matching Estimate", pos=4, cex=1.4)
box()
dev.off()

##############
# Boyd et al #
##############

title7.race <- read.csv('./data/title7_race.csv')
title7.race$LiberalOutcome <- as.integer(title7.race$case_outcome == 'Liberal')
title7.race$Female <- as.integer(title7.race$gender_judge == 'Female')
title7.race$LiberalLowerDirection <- as.integer(title7.race$lower_dir == 'Liberal')
title7.race$Republican <- as.integer(title7.race$party_judge == 'Republican')
title7.race$Experienced <- as.integer(title7.race$jud_experience == 'Experienced')
title7.race$Minority <- as.integer(title7.race$race_judge != 'White')

title7.race$jcs <- title7.race$jcs - min(title7.race$jcs)
title7.race$confirm_yr <- title7.race$confirm_yr - min(title7.race$confirm_yr)
title7.race$Age <- title7.race$dec_year - title7.race$year_birth

# panel
case.order <- unique(title7.race$order)
median.ideo <- unlist(lapply(case.order, function(x) median(title7.race[title7.race$order == x,]$jcs)))
repub.majority <- as.integer(unlist(lapply(case.order, function(x) sum(title7.race[title7.race$order == x,]$Republican) > 1)))
has.minority <- as.integer(unlist(lapply(case.order, function(x) sum(title7.race[title7.race$order == x,]$Minority) > 0)))
maj.experienced <- as.integer(unlist(lapply(case.order, function(x) sum(title7.race[title7.race$order == x,]$Experienced) > 1)))
has.woman <- as.integer(unlist(lapply(case.order, function(x)
    sum(title7.race[title7.race$order == x,]$gender_judge == 'Female') > 0)))
liberal.lower.direction <- unlist(lapply(case.order, function(x)
    unique(title7.race[title7.race$order == x,]$LiberalLowerDirection)))
median.age <- unlist(lapply(case.order, function(x) median(title7.race[title7.race$order == x,]$Age)))
median.confirmation.year <- unlist(lapply(case.order, function(x) median(title7.race[title7.race$order == x,]$confirm_yr)))
ideo.range <- unlist(lapply(case.order, function(x) diff(range(title7.race[title7.race$order == x,]$jcs))))
minorityXmedianIdeo <- median.ideo * has.minority
liberalOutcome <- unlist(lapply(case.order, function(x) unique(title7.race[title7.race$order == x,]$LiberalOutcome)))

new.title7.race.data <- data.frame(
    case.order = case.order,
    median.ideo = median.ideo,
    ideo.range = ideo.range,
    repub.majority = repub.majority,
    has.minority = has.minority,
    maj.experienced = maj.experienced,
    median.age = median.age,
    liberal.lower.direction = liberal.lower.direction,
    has.woman = has.woman,
    liberalOutcome = liberalOutcome)

treatment <- 'has.woman'
outcome <- 'liberalOutcome'

match.on.title7.race <- c('median.ideo', 'median.age', 'repub.majority', 'has.minority',
                  'maj.experienced', 'liberal.lower.direction')
title7.race.formula <- as.formula('liberalOutcome ~ has.woman + median.ideo + median.age + repub.majority +
                                            has.minority + maj.experienced + liberal.lower.direction')

title7.race.panel.frontier <- makeFrontier(dataset = new.title7.race.data,
                                           treatment = treatment,
                                           outcome = outcome,
                                           match.on = match.on.title7.race,
                                           metric = "Mahal"
                                           )

title7.race.panel.estimates <- estimateEffects(title7.race.panel.frontier,
                                               'liberalOutcome ~ has.woman',
                                               mod.dependence.formula = title7.race.formula,
                                               continuous.vars = c('median.ideo', 'median.age'),
                                               seed = 02138,
                                               prop.estimated = 1,
                                               means.as.cutpoints = FALSE)

pdf('title7_race_panel_frontier.pdf')
par(mar=c(5,6,1,1))
plotFrontier(title7.race.panel.frontier,
             main = '',
             ylab = "Mahalanobis Discrepancy",
             cex.lab = 1.4,
             cex.axis = 1.4,
             type = 'l',
             panel.first =
             grid(NULL,
                  NULL,
                  lwd = 2)
             )
dev.off()

pdf('title7_race_panel_estimates.pdf')
par(mar=c(5,6,1,1))
plotEstimates(title7.race.panel.estimates,
              main = '',
              ylab = "Estimate\n",
              cex.lab = 1.4,
              cex.axis = 1.4,
              las = 1,
              mod.dependence.col = "gray75",
              mod.dependence.border.col = "gray75",
              line.col = "black",
              panel.first =
              grid(NULL,
                   NULL,
                   lwd = 2,
                   ),
              ylim = c(0, .3),
              xlim = c(0, 275)
              )
abline(0,0, col = 'black', lty = 2)
dev.off()

pdf('title7_race_panel_means.pdf')
par(mar=c(5,4.2,1,1))
plotMeans.labelLines(title7.race.panel.frontier,
                     ylim = c(0,.82),
                     xlim = c(-230, 310),
                     main = '',
                     cex.lab = 1.4,
                     cex.axis = 1.4,
                     ## these are the variable names we matched on, but we want more informative labels
                     # title7.race.panel.frontier$match.on
                     cols = c("gray30","gray35","gray55","gray45","gray50","gray40"),
                     var.labs = c("Median Ideology",
                         "Median Age",
                         "Republican Majority",
                         "Has Minority",
                         "Majority Experienced",
                         "Liberal Lower Direction"),
                     panel.first =
                     grid(NULL,
                          NULL,
                          lwd = 2),
                     )
dev.off()

pdf('title7_race_panel_pruned_means.pdf')
par(mar=c(5,4.2,1,1))
plotPrunedMeans.labelLines(title7.race.panel.frontier,
                     ylim = c(0,1),
                     xlim = c(0, 540),
                     main = '',
                     labels.on.left=F,
                     cex.lab = 1.4,
                     cex.axis = 1.4,
                     ## these are the variable names we matched on, but we want more informative labels
                     # title7.race.panel.frontier$match.on
                     cols = c("gray30","gray35","gray55","gray45","gray50","gray40"),
                     var.labs = c("Median Ideology",
                         "Median Age",
                         "Republican Majority",
                         "Has Minority",
                         "Majority Experienced",
                         "Liberal Lower Direction"),
                     panel.first =
                     grid(NULL,
                          NULL,
                          lwd = 2),
                     )
dev.off()

##############
# SIMULATION #
##############

set.seed(1)

covars <- 2
X <- matrix(nrow=10000, ncol=covars)

for(i in 1:covars){
    X[,i] <- c(runif(n=5000, min=0, max=5), runif(n=5000, min=1, max=6))
}

colnames(X) <- paste('X', 1:ncol(X), sep = '')
treat <- c(rep(0, 5000), rep(1, 5000))
Y <- 2 * treat + apply(X, 1, sum) + rnorm(n=10000, mean=0, sd=1)

pdf("sim_dist.pdf")
plot(X[1:5000,1], X[1:5000,2],
     col = alpha("gray75", 0.6),
     pch = 16,
     xlim = c(0,6),
     ylim = c(0,6),
     xlab = "X1",
     ylab = "X2",
     panel.first =
         grid(NULL,
              NULL,
              lwd = 2))
points(X[5001:10000,1], X[5001:10000,2],
       col = alpha("black", 0.3))
dev.off()

sim.dat <- data.frame(Y = Y,
                      treat = treat,
                      X1 = X[,1],
                      X2 = X[,2]
                      )

sim.frontier <- makeFrontier(dataset = sim.dat,
                             treatment = "treat",
                             outcome = "Y",
                             match.on = c("X1", "X2"),
                             metric = "Mahal"
                             )

sim.estimates <- estimateEffects(sim.frontier,
                                 'Y ~ treat',
                                 model.dependence.ests = 100)
 
pdf('sim_frontier.pdf')
par(mar=c(5,6,1,1))
plotFrontier(sim.frontier,
             main = '',
             ylab="Mahalanobis Discrepancy\n",
             cex.lab = 1.4,
             cex.axis = 1.4,
             type = 'l',
             las = 1,
             panel.first =
                 grid(NULL,
                      NULL,
                      lwd = 2)
             )
dev.off()

pdf('sim_estimates.pdf')
par(mar=c(5,5,1,1))
plotEstimates(sim.estimates,
              main = '',
              ylab = "Estimate",
              cex.lab = 1.4,
              cex.axis = 1.4,
              las = 1,
              mod.dependence.col = "gray75",
              mod.dependence.border.col = "gray75",
              line.col = "black",
              panel.first =
                  grid(NULL,
                       NULL,
                       lwd = 2,
                       )
              )
dev.off()

plotMeans.sims <- 
function (frontier.object, xlab = "Number of Observations Pruned", 
          main = "Means Plot", xlim = c(1, max(frontier.object$frontier$Xs) + 300), 
          ylim = c(0, 3.5),
          cols = c("#33B5E5", "#AA66CC", "#99CC00", "#FFBB33", "#FF4444", "#33B5E5", "#AA66CC", "#99CC00", "#FFBB33", "#FF4444"),
          diff.in.means = TRUE, right.shift = 350, ...)
{
    covs.mat <- matrix(nrow = 0, ncol = length(frontier.object$match.on), 
        byrow = FALSE)
    colnames(covs.mat) <- frontier.object$match.on
    for (i in 1:length(frontier.object$frontier$Xs)) {
        this.dat.inds <- unlist(frontier.object$frontier$drop.order[i:length(frontier.object$frontier$drop.order)])
        dataset <- frontier.object$dataset[this.dat.inds, ]
        if (frontier.object$ratio == "weights") {
            w <- makeWeights(dataset, frontier.object$treatment)
            dataset$w <- w
            new.row <- c()
            for (col in colnames(covs.mat)) {
                if (diff.in.means) {
                  treated.inds <- dataset[[frontier.object$treatment]] == 
                    1
                  treated.mean <- weighted.mean(dataset[treated.inds, 
                    col], w = dataset$w[treated.inds])
                  control.inds <- dataset[[frontier.object$treatment]] == 
                    0
                  control.mean <- weighted.mean(dataset[control.inds, 
                    col], w = dataset$w[control.inds])
                  new.row <- c(new.row, abs(treated.mean - control.mean))
                }
                else {
                  new.row <- c(new.row, weighted.mean(dataset[, 
                    col], w = dataset$w))
                }
            }
            covs.mat <- rbind(covs.mat, new.row)
        }
        else {
            new.row <- c()
            for (col in colnames(covs.mat)) {
                if (diff.in.means) {
                  treated.inds <- dataset[[frontier.object$treatment]] == 
                    1
                  treated.mean <- mean(dataset[treated.inds, 
                    col])
                  control.inds <- dataset[[frontier.object$treatment]] == 
                    0
                  control.mean <- mean(dataset[control.inds, 
                    col])
                  new.row <- c(new.row, abs(treated.mean - control.mean))
                }
                else {
                  new.row <- c(new.row, mean(dataset[, col]))
                }
            }
            covs.mat <- rbind(covs.mat, new.row)
        }
    }
    if (!(exists("ylab"))) {
        if (diff.in.means) {
            ylab <- "Difference in unscaled means"
        }
        else {
            ylab <- "Unscaled means"
        }
    }
    x <- frontier.object$frontier$Xs
    plot(1, type = "n", xlim = xlim, ylim = ylim, xlab = xlab, 
        ylab = ylab, ...)
    for (i in 1:ncol(covs.mat)) {
        adjust <- 0
        if(diff.in.means){
            if(colnames(covs.mat)[i] == c('X1')){
                adjust <- .07
            }        
            if(colnames(covs.mat)[i] == c('X2')){
                adjust <- -.02
            } 
        } else {
            if(colnames(covs.mat)[i] == c('X1')){
                adjust <- -.85
            }        
            if(colnames(covs.mat)[i] == c('X2')){
                adjust <- -.95
            }
        }
        this.y <- covs.mat[, colnames(covs.mat)[i]]
        points(x, this.y, type = "l", col = cols[i])
        text(max(x) + right.shift, tail(this.y, 1) + adjust, colnames(covs.mat)[i], cex = 1.4, col = cols[i])
    }
}

pdf('sim_diff_in_means.pdf')
plotMeans.sims(sim.frontier,
               main = '',
               cex.lab = 1.4,
               cex.axis = 1.4,
               las = 1,
               panel.first =
                   grid(NULL,
                        NULL,
                        lwd = 2),
               ylim = c(0, 1.5),
               col = c("black", "gray40")
               )
dev.off()

pdf('sim_overall_means.pdf')
plotMeans.sims(sim.frontier,
               main = '',
               cex.lab = 1.4,
               cex.axis = 1.4,
               las = 1,
               panel.first =
                   grid(NULL,
                        NULL,
                        lwd = 2),
               ylim = c(2, 4),
               diff.in.means = FALSE,
               col = c("black", "gray40")
               )
dev.off()

plotPrunedMeans.sims <- 
function (frontier.object, xlab = "Number of Observations Pruned", 
          main = "Means Plot", xlim = c(1, max(frontier.object$frontier$Xs) + 300), 
          ylim = c(0, 3.5),
          cols = c("#33B5E5", "#AA66CC", "#99CC00", "#FFBB33", "#FF4444", "#33B5E5", "#AA66CC", "#99CC00", "#FFBB33", "#FF4444"),
          diff.in.means = TRUE, right.shift = 350, ...)
{
    covs.mat <- matrix(nrow = 0, ncol = length(frontier.object$match.on), 
        byrow = FALSE)
    colnames(covs.mat) <- frontier.object$match.on
    for (i in 1:length(frontier.object$frontier$Xs)) {
        this.dat.inds <- unlist(frontier.object$frontier$drop.order[1:i])
        dataset <- frontier.object$dataset[this.dat.inds, ]
        if (frontier.object$ratio == "weights") {
            w <- makeWeights(dataset, frontier.object$treatment)
            dataset$w <- w
            new.row <- c()
            for (col in colnames(covs.mat)) {
                if (diff.in.means) {
                  treated.inds <- dataset[[frontier.object$treatment]] == 
                    1
                  treated.mean <- weighted.mean(dataset[treated.inds, 
                    col], w = dataset$w[treated.inds])
                  control.inds <- dataset[[frontier.object$treatment]] == 
                    0
                  control.mean <- weighted.mean(dataset[control.inds, 
                    col], w = dataset$w[control.inds])
                  new.row <- c(new.row, abs(treated.mean - control.mean))
                }
                else {
                  new.row <- c(new.row, weighted.mean(dataset[, 
                    col], w = dataset$w))
                }
            }
            covs.mat <- rbind(covs.mat, new.row)
        }
        else {
            new.row <- c()
            for (col in colnames(covs.mat)) {
                if (diff.in.means) {
                  treated.inds <- dataset[[frontier.object$treatment]] == 
                    1
                  treated.mean <- mean(dataset[treated.inds, 
                    col])
                  control.inds <- dataset[[frontier.object$treatment]] == 
                    0
                  control.mean <- mean(dataset[control.inds, 
                    col])
                  new.row <- c(new.row, abs(treated.mean - control.mean))
                }
                else {
                  new.row <- c(new.row, mean(dataset[, col]))
                }
            }
            covs.mat <- rbind(covs.mat, new.row)
        }
    }
    if (!(exists("ylab"))) {
        if (diff.in.means) {
            ylab <- "Difference in unscaled means"
        }
        else {
            ylab <- "Unscaled means"
        }
    }
    x <- frontier.object$frontier$Xs
    plot(1, type = "n", xlim = xlim, ylim = ylim, xlab = xlab, 
        ylab = ylab, ...)
    for (i in 1:ncol(covs.mat)) {
        adjust <- 0
        if(diff.in.means){
            if(colnames(covs.mat)[i] == c('X1')){
                adjust <- .07
            }        
            if(colnames(covs.mat)[i] == c('X2')){
                adjust <- -.03
            } 
        } else {
            if(colnames(covs.mat)[i] == c('X1')){
                adjust <- .1
            }        
            if(colnames(covs.mat)[i] == c('X2')){
                adjust <- -.05
            }
        }
        this.y <- covs.mat[, colnames(covs.mat)[i]]
        points(x, this.y, type = "l", col = cols[i])
        text(max(x) + right.shift, tail(this.y, 1) + adjust, colnames(covs.mat)[i], cex = 1.4, col = cols[i])
    }
}

pdf('sim_pruned_means.pdf')
plotPrunedMeans.sims(sim.frontier,
               main = '',
               cex.lab = 1.4,
               cex.axis = 1.4,
               las = 1,
               panel.first =
                   grid(NULL,
                        NULL,
                        lwd = 2),
               ylim = c(2, 4),
               diff.in.means = FALSE,
               col = c("black", "gray40")
               )
dev.off()
